###############################################################################
###   Raz (2024).                 													                ###
###   Soil Heterogeneity, Social Learning, and the Formation of Close-knit  ###
###   Communities.                                                          ###
###-------------------------------------------------------------------------###
###   Make Figures       								                                    ###
###############################################################################

# Author: Itzchak Tzachi Raz
# Last Revised: 11/30/2024

# Note: Some input data files could not be made publicly available.  


# Preambles ---------------------------------------------------------------

#---------------------------------
# --- Load packages  
#---------------------------------

library(dplyr)
library(ggplot2)
library(data.table)
library(ipumsr)
library(lfe)
library(sf)
library(tmap)
library(latex2exp)
library(stringr)
library(cowplot)
library(scales)
library(grid)
library(conleyreg)


#---------------------------------
# --- Paths
#---------------------------------

scriptPath <- dirname(rstudioapi::getSourceEditorContext()$path)   
if(dir.exists(scriptPath) == FALSE) {
  print("ERROR in script path")
}
BasicPath <- dirname(scriptPath)
DataPath <- paste0(BasicPath, "/data")
TempDataPath <- paste0(DataPath, "/temp")
PaperFiguresPath <- paste0(BasicPath, "/output/paper/figures")
AppendixFiguresPath <- paste0(BasicPath, "/output/appendix/figures")

#---------------------------------
# -- Parameters
#---------------------------------

my.SmoothLocCtrls <- "Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord"
my.colClasses <- c("STATE_TERR"="factor", "YEAR"="factor")
mile2km <- 1.609
my.ipums_api_key <- "59cba10d8a5da536fc06b59da3338441c1ac4f12b60b349d7e399698" # Change to your key


# Functions ---------------------------------------------------------------

#--- Function to plot maps ---
plot_map <- function(y = NA, my.data = NA, my.col = NA, my.title = NA, 
                     outfile = NA) {
  
  if(is.na(y)) {
    message("ERROR. Must specify year")
    break
  }
  if(is.na(my.data)) {
    message("ERROR. Must specify dataset")
    break
  }
  if(is.na(my.col)) {
    message("ERROR. Must specify yariable to plot")
    break
  }
  if(is.na(outfile)) {
    message("ERROR. Must specify path to save map")
    break
  }
  
  
  if(floor(y/10)==y/10) {
    map_y <- y
  } else {
    map_y <- 10*floor(y/10)
  }

  FilePath <- paste0(TempDataPath, "/shapefiles/counties/tl2000_us_county_", map_y)
  Counties <- st_read(FilePath) %>%
    st_simplify(dTolerance = 100) %>%
    select(-SHAPE_AREA) %>%
    left_join(eval(parse(text = my.data))) %>%
    filter(!STATENAM %in% c("Alaska Territory", "Hawaii Territory")) %>%
    filter(!STATENAM %in% c("Alaska", "Hawaii")) 
  
  FilePath <- paste0(TempDataPath, "/shapefiles/states/tl2000_us_state_", map_y)
  States <- st_read(FilePath) %>%
    st_simplify(dTolerance = 1000)
  
  my.palette = c("#FFFFE5", "#fff9cd", "#ffe79c",
                 "#ffcc66", "#ffa139", "#f17316",
                 "#e55a0a", "#bd4005", "#852f06", 
                 "#662506")
  
  counties_label <- paste0("US Counties, ", y)
  states_label <- paste0("US States, ", y)
  my.map <- tm_shape(Counties, unit = "mi") + tm_polygons(col = my.col,
                                                          style = "quantile", n=10,
                                                          border.alpha = 0.5,
                                                          border.col = "grey",
                                                          title = my.title,
                                                          #palette = "Browns",
                                                          palette = my.palette) +
    tm_add_legend(type="line", col="grey", alpha = 0.5, labels = counties_label, lwd = 1) +
    tm_shape(States, unit = "mi") + tm_borders(col = "black") + 
    tm_add_legend(type="line", col="black", labels = states_label, lwd = 1) +
    tm_compass(type = "8star", position = c("0.75", "0.85"), size = 3.5) +
    tm_scale_bar(breaks = c(0, 100, 200), position = c("0.88", "0.01"), size = 1.5) +
    tm_layout(frame = FALSE,
              legend.outside = TRUE,
              legend.outside.position = "right",
              legend.outside.size = .2,
              legend.text.size = .84,
              legend.title.size = 1,
              legend.title.fontface = "bold"
              
    )
  
  # saving map
  tmap_save(my.map, outfile, height = 20, width = 27, units = "cm")
}


#--- Function to plot the DD coefficients and save as figure ---
plotDD <- function(out, outfile, u_limit = NA, d_limit = NA) {
  
  if(!is.na(u_limit)) {
    out <- out %>% as.data.table()
    out[c95u > u_limit, ':=' (c95u_extra = paste0(round(100*(c95u/u_limit-1), 0), "% \u2191"),
                              c95u = u_limit)]
  }
  if(!is.na(d_limit)) {
    out <- out %>% as.data.table()
    out[c95d < d_limit, ':=' (c95d_extra = paste0(round(100*(c95d/d_limit-1), 0), "% \u2193"),
                              c95d = d_limit)]
  }
  
  my.plot <- ggplot(data = out, aes(x=t, y=beta)) + theme_bw() + 
    geom_point(size=1.25, color = "darkblue") + 
    geom_line(color = "darkblue") +
    geom_linerange(aes(x=t,  ymin=c95d, ymax=c95u), size=0.5, color = "darkblue") + 
    geom_hline(aes(yintercept = 0), col = "grey25") + 
    geom_vline(aes(xintercept = 0), col = "grey40", linetype = "dashed") + 
    labs(x = "Birth year relative to move year", y = TeX('$\\beta_{\\ b }$')) +
    scale_x_continuous(breaks = c(-5:7), 
                       labels = c("-5+", "-4", "-3", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7+")) + 
    theme(axis.text=element_text(size=7),
          axis.title=element_text(size=8),
          axis.title.y = element_text(angle = 0, vjust=0.5, face="bold"),
          panel.border = element_blank(), 
          panel.background = element_rect(fill = "gray95"),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(color = "gray"),
          panel.grid.minor = element_blank(), 
          axis.line = element_line(colour = "black"))
  
  if(!is.na(u_limit)) {
    my.plot <- my.plot +
      geom_text(aes(label=c95u_extra, y=u_limit*.95), hjust = 1.1, size = 1.5)
  }
  if(!is.na(d_limit)) {
    my.plot <- my.plot +
      geom_text(aes(label=c95d_extra, y=d_limit*.95), hjust = 1.1, size = 1.5)
  }
  
  if(abs(round(d_limit))>0) {
    my.by = 1
  } else {
    my.by=0.025
  }
  
  if(!is.na(u_limit) & !is.na(d_limit)) {
    my.plot <- my.plot + scale_y_continuous(breaks = seq.int(-30, 30, by = my.by), limits = c(d_limit,u_limit), expand = c(0,0))
    
  } else {
    my.plot <- my.plot + scale_y_continuous(breaks = seq.int(-30, 30, by = my.by))
  }
  
  ggsave(filename = outfile, 
         plot = my.plot,
         height = 6, width = 10, units = "cm")
  
}

#--- Function to plot inference robustness ---
clusterSErobustness <- function(dep.var.name = NA, indep.var.name = NA,
                                FEs = NA, ClusterVarList = NA, reg.data = NA,
                                outfile = NA, robust = FALSE, 
                                countyCluster = FALSE, stateCluster = FALSE,
                                conleySEs = FALSE, conleyPARs = NA) {
  
  out <- lapply(as.list(ClusterVarList), function(x) 
  {felm(formula(paste0(dep.var.name, " ~ ", indep.var.name, " + ", 
                       my.SmoothLocCtrls, " | ", FEs, " | 0 | ", x)),
        data = reg.data)})
  
  beta <- out[[1]]$beta[1]
  
  out.cluster <- NA
  
  for (i in 1:length(out)) {
    out.cluster <- rbind(out.cluster,
                         data.frame(cluster_var = ClusterVarList[i],
                                    se = out[[i]]$cse[1],
                                    cn1 = beta - 2.576*out[[i]]$cse[1],
                                    cp1 = beta + 2.576*out[[i]]$cse[1],
                                    cn5 = beta -1.960*out[[i]]$cse[1],
                                    cp5 = beta +1.960*out[[i]]$cse[1],
                                    cn10 = beta -1.645*out[[i]]$cse[1],
                                    cp10 = beta +1.645*out[[i]]$cse[1]) %>%
                           mutate(p_value = ifelse(((cn1>0)&(cp1>0))|((cn1<0)&(cp1<0)), 1,
                                                   ifelse(((cn5>0)&(cp5>0))|((cn5<0)&(cp5<0)), 2,
                                                          ifelse(((cn10>0)&(cp10>0))|((cn10<0)&(cp10<0)), 3,4))),
                                  num_cluster_grp = length(unique(out[[i]]$clustervar[[1]])),
                                  x = as.numeric(str_extract(cluster_var, "[0-9]+"))))
    
  }
  
  out.cluster <- out.cluster %>%
    mutate(p_value = as.factor(p_value)) %>%
    filter(!is.na(cn1))
  
  SE90 =  abs(beta / 1.645)
  SE95 =  abs(beta / 1.96)
  SE99 =  abs(beta / 2.576)
  
  my.plot <- ggplot(out.cluster, aes(x=x, y=se)) + theme_bw() + 
    geom_point(aes(color = p_value), size=1.5) +
    scale_colour_manual(values = c("1" = "blue3", "2" = "green3", "3" = "darkgreen", "4" = "red"),
                        labels = c("<0.01", "<0.05", "<0.1", ">0.1"),
                        name = "P-value") +
    geom_text(aes(label=num_cluster_grp), nudge_x=0.05, vjust = 2, size=2) +
    geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = SE90 , ymax = Inf), color = NA, alpha = 0.02) +
    geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = SE95 , ymax = Inf), color = NA, alpha = 0.02) +
    geom_rect(aes(xmin = -Inf, xmax = Inf, ymin = SE99 , ymax = Inf), color = NA, alpha = 0.02) +
    labs(y = "Standard Error", x = "Cluster size (miles square)") +
    theme(plot.caption = element_text(hjust = 0),
          axis.text=element_text(size=7),
          axis.title=element_text(size=8, face="bold"),
          legend.title = element_text(size=7, face="bold"),
          legend.text = element_text(size=6),
          legend.spacing.y = unit(0, "cm"),
          legend.key.height = unit(-0.5, "cm")
    )
  
  
  my.values <- NULL
  my.colors <- NULL
  my.names <- NULL
  
  if(robust) {
    y_robust <- out[[1]]$rse[1]
    my.plot <- my.plot + 
      geom_hline(aes(yintercept = y_robust, linetype = "Robust"), col = "darkgreen", lwd = 0.25)
    my.values <- c(my.values, 1)
    my.colors <- c(my.colors, "darkgreen")
    my.names <- c(my.names, "Robust")
  }
  
  if(countyCluster) {
    y_county <- felm(formula(paste0(dep.var.name, " ~ ", indep.var.name, " + ", 
                                    my.SmoothLocCtrls, " | ", FEs, " | 0 | GISJOIN")),
                     data = reg.data)$cse[1]
    my.plot <- my.plot + 
      geom_hline(aes(yintercept = y_county, linetype = "County"), col = "darkblue", lwd = 0.25) 
    my.values <- c(my.values, 2)
    my.colors <- c(my.colors, "darkblue")
    my.names <- c(my.names, "County")
  }
  
  if(stateCluster) {
    y_state <- felm(formula(paste0(dep.var.name, " ~ ", indep.var.name, " + ", 
                                   my.SmoothLocCtrls, " | ", FEs, " | 0 | STATE_TERR")),
                    data = reg.data)$cse[1]
    my.plot <- my.plot + 
      geom_hline(aes(yintercept = y_state, linetype = "State"), col = "red", lwd = 0.25) 
    my.values <- c(my.values, 3)
    my.colors <- c(my.colors, "red")
    my.names <- c(my.names, "State")
    
  }
  
  if(conleySEs) {
    conleyPARs[[2]]$data <- conleyPARs[[2]]$data[!is.na(SHI25)]
    conleyPARs[[2]]$data <- conleyPARs[[2]]$data[YEAR != ""]
    conleyPARs[[2]]$data[, ':=' (year = as.double(as.character(YEAR)),
                                 countyID = substr(GISJOIN, 2, 9) %>% as.numeric(), 
                                 lat = YCoord %>% as.numeric(),
                                 lon = XCoord %>% as.numeric())]
    conleyPARs[[2]]$data[, STATE_TERRxYEAR := .GRP, by = list(STATE_TERR, YEAR)]
    
    my.conleySEs <- do.call(conleyreg, conleyPARs[[2]]) 
    
    conleyPARs[[2]]$data[, ':=' (year = NULL,
                                 countyID = NULL, 
                                 lat = NULL,
                                 lon = NULL,
                                 STATE_TERRxYEAR = NULL)]
    
    my.plot <- my.plot + 
      geom_hline(aes(yintercept = my.conleySEs[conleyPARs[[1]],2], linetype = "Conley"), col = "black", lwd = 0.25) 
    my.values <- c(my.values, 4)
    my.colors <- c(my.colors, "black")
    my.names <- c(my.names, "Conley")
    
    
  }
  
  if(any(robust, countyCluster, stateCluster, conleySEs)) {
    my.plot <- my.plot + 
      scale_linetype_manual(name = "Other", values = my.values, breaks =my.names,
                            guide = guide_legend(override.aes = list(color = my.colors)))
  }
  
  
  ggsave(filename = outfile, plot = my.plot,
         width = 10, height = 6, units = "cm")
  
}


#--- Function to creates bin scatter plots ---
OLSscatterPlot <- function(my.data = NA, my.dep.var = NA, my.indep.var = NA,
                           my.ctrls = NA, my.cluster.var = NA, 
                           outfile = NA, my.y_lab = NA, my.x_lab = NA) {
  
  # Filter data
  my.data <- my.data[!is.na(eval(parse(text = my.dep.var))) & !is.na(eval(parse(text = my.indep.var)))]
  
  # Residuals 
  my.data[, dep.var.re := felm(as.formula(paste0(my.dep.var, " ~ ", my.ctrls)), 
                               data = my.data)$residuals %>% as.numeric()]
  
  my.data[, indep.var.re := felm(as.formula(paste0(my.indep.var, " ~ ", my.ctrls)), 
                                 data = my.data)$residuals %>% as.numeric()]
  
  # Beta and SE for the uncontrolled regression
  
  b <- round(summary(felm(eval(parse(text = my.dep.var))  ~  eval(parse(text = my.indep.var))
                          | 1 , data = my.data))$coefficients[2,1],
             3)
  
  if(!is.na(my.cluster.var)) {
    se <-  round(summary(felm(as.formula(paste0(my.dep.var, " ~ ", my.indep.var, " | 1 | 0 | ", my.cluster.var))
                              , data = my.data))$coefficients[2,2],
                 3)
    
  } else {
    se <-  round(felm(as.formula(paste0(my.dep.var, " ~ ", my.indep.var, " | 1 "))
                              , data = my.data)$se[2],
                 3)
  }
  
  
  # Plot uncontrolled regression
  fit1 <- lm(eval(parse(text = my.dep.var)) ~ eval(parse(text = my.indep.var)), data = my.data)
  if(!is.na(my.cluster.var)) {
    t1 <- jtools::make_predictions(model = fit1, pred = my.indep.var, interval = TRUE, 
                                   robust = "HC0", cluster = my.cluster.var, data = my.data)
  } else {
    t1 <- jtools::make_predictions(model = fit1, pred = my.indep.var, interval = TRUE, 
                                   robust = "HC0", data = my.data)
  }
  setnames(t1, new = c(my.indep.var, my.dep.var, "ymin", "ymax"))
  
  if(!round(summary(fit1)$coefficients[2,1],3) == b) {
    message("ERROR. Estimation error.")
    break
  }
  
  
  t <- copy(my.data)
  
  t[, bin := cut(eval(parse(text = my.indep.var)), 
                 seq(min(eval(parse(text = paste0("t$", my.indep.var)))), max(eval(parse(text = paste0("t$", my.indep.var)))), 
                     (max(eval(parse(text = paste0("t$", my.indep.var))))-min(eval(parse(text = paste0("t$", my.indep.var)))))/24))]
  t[, ':=' (y_bin = mean(eval(parse(text = my.dep.var)), na.rm = T),
            x_bin = mean(eval(parse(text = my.indep.var)), na.rm = T),
            n_bin = .N),
    by = bin]
  
  
  # Place the annotation box. Upward trend: left. Downward trend: right
  annotation_x <- ifelse(b<0, 0.9, 0.1)
  
  ggplot(t)   +
    geom_point(aes(y=y_bin , x = x_bin, size = n_bin), color='darkblue') +
    scale_size(range = c(0.25, 5)) +
    geom_line(data = t1, aes(eval(parse(text = my.indep.var)), eval(parse(text = my.dep.var))), 
              color = "black", lwd = 0.375) +
    labs(y = my.y_lab, x = my.x_lab) +
    annotation_custom(grid::textGrob(label = paste0("Slope: ", b, "\nSE: ", se),
                                     gp = gpar(fontsize = 7),
                                     x = unit(annotation_x, "npc"),
                                     y = unit(0.9, "npc"))) +
    theme(axis.text=element_text(size=6),
          axis.title=element_text(size=7),
          axis.title.y = element_text(face="bold"),
          axis.title.x = element_text(face="bold"),
          panel.border = element_blank(),
          panel.background = element_rect(fill = "gray95"),
          panel.grid.major.x = element_line(color = "gray"),
          panel.grid.major.y = element_line(color = "gray"),
          panel.grid.minor = element_blank(),
          axis.line = element_line(colour = "black"),
          legend.position="none")
  
  ggsave(filename = outfile, width = 10, height = 7.5, units = "cm")
  
  
  
  # Beta and SE from the controlled regression
  
  b <- round(summary(felm(as.formula(paste0(my.dep.var, " ~ ", my.indep.var, " + ",
                                            my.ctrls)), data = my.data))$coefficients[1,1],
             3)
  
  if(!is.na(my.cluster.var)) {
    se <-  round(summary(felm(as.formula(paste0(my.dep.var, " ~ ", my.indep.var, " + ",
                                                my.ctrls, " | 0 | ", my.cluster.var)), 
                              data = my.data))$coefficients[1,2], 
                 3)
    
  } else {
    se <-  round(felm(as.formula(paste0(my.dep.var, " ~ ", my.indep.var, " + ",
                                                my.ctrls)), 
                              data = my.data)$se,
                 3)
  }
  
  
  # Plot controlled regression
  fit1 <- lm(dep.var.re ~ indep.var.re, data = my.data)
  t1 <- jtools::make_predictions(model = fit1, pred = "indep.var.re", interval = TRUE, 
                                 robust = "HC0", data = my.data)
  
  if(!round(summary(fit1)$coefficients[2,1],3) == b) {
    message("ERROR. Estimation error.")
    break
  }
  
  
  t <- copy(my.data)
  
  t[, bin := cut(indep.var.re, seq(min(t$indep.var.re), max(t$indep.var.re), (max(t$indep.var.re)-min(t$indep.var.re))/24))]
  t[, ':=' (y_bin = mean(dep.var.re, na.rm = T),
            x_bin = mean(indep.var.re, na.rm = T),
            n_bin = .N),
    by = bin]
  
  # Place the annotation box. Upward trend: left. Downward trend: right
  annotation_x <- ifelse(b<0, 0.9, 0.1)
  
  ggplot(t)   +
    geom_point(aes(y=y_bin , x = x_bin, size = n_bin), color='darkblue') +
    scale_size(range = c(0.25, 3.75)) +
    geom_line(data = t1, aes(indep.var.re, dep.var.re), color = "black", lwd = 0.375) +
    labs(y = paste0(my.y_lab, " | X"), x =  paste0(my.x_lab, " | X")) +
    annotation_custom(grid::textGrob(label = paste0("Slope: ", b, "\nSE: ", se),
                                     gp = gpar(fontsize = 7),
                                     x = unit(annotation_x, "npc"),
                                     y = unit(0.9, "npc"))) +
    xlim(min(t$indep.var.re), max(t$indep.var.re)) +
    theme(axis.text=element_text(size=6),
          axis.title=element_text(size=7),
          axis.title.y = element_text(face="bold"),
          axis.title.x = element_text(face="bold"),
          panel.border = element_blank(),
          panel.background = element_rect(fill = "gray95"),
          panel.grid.major.x = element_line(color = "gray"),
          panel.grid.major.y = element_line(color = "gray"),
          panel.grid.minor = element_blank(),
          axis.line = element_line(colour = "black"),
          legend.position="none")
  
  ggsave(filename = paste0(str_split(outfile, ".jpg")[[1]][1], "_X.jpg"), 
         width = 10, height = 7.5, units = "cm")
  
}


# --- Function to plot effects over time ---
plotEffectTime <- function(dep.var.name, yearsList, outfile) {
  
  i <- 1
  for (y in yearsList) {
    assign(paste0("m",i), felm(formula(paste0(dep.var.name, " ~ SHI25 + ", my.SmoothLocCtrls,
                                              " | STATE_TERR | 0 | grid100m")),
                               data = CountyLevelData[YEAR==y]))
    i <- i + 1
  }
  
  out.plot <- data.frame(my.axis = yearsList,
                         beta = lapply(1:(i-1), function(x) eval(parse(text=paste0("m",x)))["beta"][[1]][1]) %>% unlist(),
                         SE = lapply(1:(i-1), function(x) eval(parse(text=paste0("m",x)))["cse"][[1]][1]) %>% unlist()) %>%
    mutate( cn1 = beta - 2.576*SE,
            cp1 = beta + 2.576*SE,
            cn5 = beta -1.960*SE,
            cp5 = beta +1.960*SE,
            cn10 = beta -1.645*SE,
            cp10 = beta +1.645*SE)
  
  CI5 = geom_linerange(aes(x=my.axis,  ymin=cn5, ymax=cp5), size=0.5, color = "darkblue")
  
  my.plot <- ggplot(data = out.plot, aes(x=my.axis, y=beta)) + theme_bw() + geom_point(color = "darkblue", size = 2) +
    geom_line(color = "darkblue") +
    CI5 + scale_y_continuous() +
    geom_hline(aes(yintercept = 0), col = "grey25") + 
    scale_x_continuous(breaks = c(1850, 1860, 1870, 1880, 1890, 1900, 1910, 1920, 1930, 1940),
                       labels = c("1850", "1860", "1870", "1880", "1890",
                                  "1900", "1910", "1920", "1930", "1940")) + 
    labs(title = NULL, 
         caption = NULL,
         y = expression(beta),
         x= "Year") +
    theme(
      axis.title.y = element_text(vjust = 0.5, angle=0, face="bold"),
      axis.text=element_text(size=7),
      axis.title=element_text(size=8),
      plot.caption=element_text(hjust = 0),
      legend.position="none",
      panel.border = element_blank(), 
      panel.background = element_rect(fill = "gray95"),
      panel.grid.major = element_line(color = "gray"),
      panel.grid.minor = element_blank(), 
      axis.line = element_line(colour = "black"))
  
  ggsave(filename = outfile, 
         plot = my.plot,
         height = 6, width = 10, units = "cm")
  
}

#--- Function to plot coefficients of covariates on all main outcomes ---
plotGeoCoefs <- function(my.geo = NA, my.geo.title = NA, last.geo = FALSE) {
  
  my_names <- c("SHI25", "Elevation", "Slope", "Flow_acc", "Precipitation", "Temperature",
                "RiverDensity", "maxval", "SHAPE_AREA", "dist2NavRiver",
                "dist2Shoreline", "dist2Lakes")
  
  stdGeo <- copy(CountyLevelData[, .SD, .SDcols = c("GISJOIN", "YEAR", my_names)])
  stdGeo[, (paste0("z",my_names)) := lapply(.SD, function(x) as.vector(scale(x))),
         by = "YEAR", .SDcols = my_names]
  stdGeo[, (my_names) := NULL]
  stdGeo[, ':=' (GISJOIN = as.character(GISJOIN),
                 YEAR = as.character(YEAR))]
  
  my.data <- copy(CountyLevelData)
  my.data[, ':=' (GISJOIN = as.character(GISJOIN),
                  YEAR = as.character(YEAR))]
  my.data <- merge(my.data, stdGeo, by = c("GISJOIN", "YEAR"))
  my.data[, zLNI_county_Native := scale(LNI_county_Native),by = "YEAR"]
  my.data[, zICM_Native := scale(ICM_Native), by = "YEAR"]
  my.data[, zKPR_Native := scale(KPR_Native), by = "YEAR"]
  my.data[, zMKP_Native := scale(MKP_Native), by = "YEAR"]
  my.data[, ':=' (GISJOIN = as.factor(GISJOIN),
                  YEAR = as.factor(YEAR))]
  
  my.MigrantsData <- copy(MigrantsData)
  my.MigrantsData[, ':=' (GISJOIN = as.character(GISJOIN),
                          YEAR = as.character(YEAR))]
  my.MigrantsData <- merge(my.MigrantsData, stdGeo, by = c("GISJOIN", "YEAR"))
  my.MigrantsData[, zLNI_state_Native := scale(LNI_state_Native), by = "YEAR"]
  my.MigrantsData[, ':=' (GISJOIN = as.factor(GISJOIN),
                          YEAR = as.factor(YEAR))]
  
  my.reg <- summary(felm(zLNI_state_Native ~
                           Post:eval(parse(text = paste0("z",my.geo)))
                         | familyFE + Rtime | 0 | GISJOIN, data = my.MigrantsData))
  
  out <- data.table(my.geo = my.geo,
                    my.model = "DD: LNI",
                    my.coef = my.reg$coefficients[1,1],
                    my.SE = my.reg$coefficients[1,2],
                    c95u = my.reg$coefficients[1,1] + 1.96*my.reg$coefficients[1,2],
                    c95d = my.reg$coefficients[1,1] - 1.96*my.reg$coefficients[1,2],
                    c90u = my.reg$coefficients[1,1] + 1.65*my.reg$coefficients[1,2],
                    c90d = my.reg$coefficients[1,1] - 1.65*my.reg$coefficients[1,2])
  
  my.reg <- summary(felm(zLNI_state_Native ~
                           Post:farmerDad:eval(parse(text = paste0("z",my.geo)))
                         + Post:farmerDad + Post:eval(parse(text = paste0("z",my.geo)))
                         | familyFE + Rtime | 0 | GISJOIN, data = my.MigrantsData))
  
  out <- rbindlist(list(out, data.table(my.geo = my.geo, 
                                        my.model = "DDD: LNI", 
                                        my.coef = my.reg$coefficients[3,1], 
                                        my.SE = my.reg$coefficients[3,2],
                                        c95u = my.reg$coefficients[3,1] + 1.96*my.reg$coefficients[3,2],
                                        c95d = my.reg$coefficients[3,1] - 1.96*my.reg$coefficients[3,2],
                                        c90u = my.reg$coefficients[3,1] + 1.65*my.reg$coefficients[3,2],
                                        c90d = my.reg$coefficients[3,1] - 1.65*my.reg$coefficients[3,2])))
  
  #--- Migrants' Children ---
  my.MigrantsChildren <- copy(MigrantsChildren)
  my.MigrantsChildren[, ':=' (GISJOIN = as.character(GISJOIN),
                              YEAR = as.character(YEAR))]
  my.MigrantsChildren <- merge(my.MigrantsChildren, stdGeo, by = c("GISJOIN", "YEAR"))
  my.MigrantsChildren[, zpercent := scale(percent), by = "YEAR"]
  my.MigrantsChildren[, zICM_d := scale(ICM_d), by = "YEAR"]
  my.MigrantsChildren[, ':=' (GISJOIN = as.factor(GISJOIN),
                              YEAR = as.factor(YEAR),
                              bpl = as.factor(bpl),
                              statefip_y = as.factor(statefip_y),
                              Rtime = as.factor(Rtime),
                              highLNI = as.factor(highLNI),
                              ICM = as.factor(ICM)
  )]
  
  my.reg <- summary(felm(zpercent ~ zSHI25 
                         + zElevation + zSlope + zFlow_acc + zPrecipitation + zTemperature + zRiverDensity 
                         + zmaxval + zSHAPE_AREA + zdist2NavRiver + zdist2Shoreline + zdist2Lakes
                         + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | 
                           bpl:statefip_y:YEAR:Rtime:highLNI:ICM | 0 | GISJOIN, 
                         data = my.MigrantsChildren))
  
  
  out2 <- data.table(my.geo = my.geo,
                     my.model = "Migrants' children: Staying",
                     my.coef = my.reg$coefficients[[paste0("z",my.geo),1]],
                     my.SE = my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]])
  
  out <- rbindlist(list(out, out2))
  
  my.reg <- summary(felm(zpercent ~ zSHI25 
                         + zElevation + zSlope + zFlow_acc + zPrecipitation + zTemperature + zRiverDensity 
                         + zmaxval + zSHAPE_AREA + zdist2NavRiver + zdist2Shoreline + zdist2Lakes
                         + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | 
                           bpl:statefip_y:YEAR:Rtime:highLNI:ICM | 0 | GISJOIN, 
                         data = my.MigrantsChildren[farmerDad==1]))
  
  
  out2 <- data.table(my.geo = my.geo,
                     my.model = "Migrant-farmers' children: Staying",
                     my.coef = my.reg$coefficients[[paste0("z",my.geo),1]],
                     my.SE = my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]])
  
  out <- rbindlist(list(out, out2))
  
  my.reg <- summary(felm(zICM_d ~ zSHI25 
                         + zElevation + zSlope + zFlow_acc + zPrecipitation + zTemperature + zRiverDensity 
                         + zmaxval + zSHAPE_AREA + zdist2NavRiver + zdist2Shoreline + zdist2Lakes
                         + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | 
                           bpl:statefip_y:YEAR:Rtime:highLNI:ICM | 0 | GISJOIN, 
                         data = my.MigrantsChildren))
  
  
  out2 <- data.table(my.geo = my.geo,
                     my.model = "Migrants' children: ICM",
                     my.coef = my.reg$coefficients[[paste0("z",my.geo),1]],
                     my.SE = my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]])
  
  out <- rbindlist(list(out, out2))
  
  my.reg <- summary(felm(zICM_d ~ zSHI25 
                         + zElevation + zSlope + zFlow_acc + zPrecipitation + zTemperature + zRiverDensity 
                         + zmaxval + zSHAPE_AREA + zdist2NavRiver + zdist2Shoreline + zdist2Lakes
                         + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | 
                           bpl:statefip_y:YEAR:Rtime:highLNI:ICM | 0 | GISJOIN, 
                         data = my.MigrantsChildren[farmerDad==1]))
  
  
  out2 <- data.table(my.geo = my.geo,
                     my.model = "Migrant-farmers' children: ICM",
                     my.coef = my.reg$coefficients[[paste0("z",my.geo),1]],
                     my.SE = my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c95d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.96*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]],
                     c90d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.65*
                       my.reg$coefficients[[paste0("z",my.geo),2]])
  
  out <- rbindlist(list(out, out2))
  
  #--- County level ---
  my.vars <- c("zLNI_county_Native", "zICM_Native", "TNI_Native",
               "RHI", "zMKP_Native", "zKPR_Native", "SFTI_Native")
  my.labels <- c("County-level: LNI", "County-level: ICM", "County-level: TNI",
                 "County-level: RHI", "County-level: MKP", "County-level: KPR", "County-level: SFTI")
  
  for (i in 1:length(my.vars)) {
    my.reg <- summary(felm(eval(parse(text = my.vars[i])) ~  eval(parse(text = paste0("z", my.geo))) 
                           | 1 | 0 | grid100m, data = my.data))
    
    
    
    out2 <- data.table(my.geo = my.geo,
                       my.model = paste0(my.labels[i], ", No Controls"),
                       my.coef = my.reg$coefficients[2,1],
                       my.SE = my.reg$coefficients[2,2],
                       c95u = my.reg$coefficients[2,1] + 1.96*my.reg$coefficients[2,2],
                       c95d = my.reg$coefficients[2,1] - 1.96*my.reg$coefficients[2,2],
                       c90u = my.reg$coefficients[2,1] + 1.65*my.reg$coefficients[2,2],
                       c90d = my.reg$coefficients[2,1] - 1.65*my.reg$coefficients[2,2])
    
    out <- rbindlist(list(out, out2))
    
    my.reg <- summary(felm(eval(parse(text = my.vars[i])) ~  eval(parse(text = paste0("z", my.geo))) 
                           | STATE_TERR:YEAR | 0 | grid100m, data = my.data))
    
    out2 <- data.table(my.geo = my.geo,
                       my.model = paste0(my.labels[i], ", State X Year FEs"),
                       my.coef = my.reg$coefficients[1,1],
                       my.SE = my.reg$coefficients[1,2],
                       c95u = my.reg$coefficients[1,1] + 1.96*my.reg$coefficients[1,2],
                       c95d = my.reg$coefficients[1,1] - 1.96*my.reg$coefficients[1,2],
                       c90u = my.reg$coefficients[1,1] + 1.65*my.reg$coefficients[1,2],
                       c90d = my.reg$coefficients[1,1] - 1.65*my.reg$coefficients[1,2])
    out <- rbindlist(list(out, out2))
    
    my.reg <- summary(felm(eval(parse(text = my.vars[i])) ~  zSHI25 
                           + zElevation + zSlope + zFlow_acc + zPrecipitation + zTemperature + zRiverDensity 
                           + zmaxval + zSHAPE_AREA + zdist2NavRiver + zdist2Shoreline + zdist2Lakes
                           + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord 
                           | STATE_TERR:YEAR | 0 | grid100m, data = my.data))
    
    
    out2 <- data.table(my.geo = my.geo,
                       my.model = paste0(my.labels[i], ", Baseline Controls"),
                       my.coef = my.reg$coefficients[[paste0("z",my.geo),1]],
                       my.SE = my.reg$coefficients[[paste0("z",my.geo),2]],
                       c95u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.96*
                         my.reg$coefficients[[paste0("z",my.geo),2]],
                       c95d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.96*
                         my.reg$coefficients[[paste0("z",my.geo),2]],
                       c90u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.65*
                         my.reg$coefficients[[paste0("z",my.geo),2]],
                       c90d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.65*
                         my.reg$coefficients[[paste0("z",my.geo),2]])
    out <- rbindlist(list(out, out2))
    
    my.reg <- summary(felm(eval(parse(text = my.vars[i])) ~  zSHI25 
                           + zElevation + zSlope + zFlow_acc + zPrecipitation + zTemperature + zRiverDensity 
                           + zmaxval + zSHAPE_AREA + zdist2NavRiver + zdist2Shoreline + zdist2Lakes
                           + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord 
                           + AlfalfaSI + CottonSI +  MaizeSI + OatSI + RyeSI + SugercaneSI + Sweet_potatoSI + TobaccoSI +  WheatSI + White_potatoSI 
                           + ElevationSD + SlopeSD + Flow_accSD + PrecipitationSD + TemperatureSD + AlfalfaSI_SD + CottonSI_SD + MaizeSI_SD + OatSI_SD + RyeSI_SD + SugercaneSI_SD + Sweet_potatoSI_SD + TobaccoSI_SD +  WheatSI_SD + White_potatoSI_SD
                           | STATE_TERR:YEAR | 0 | grid100m, data = my.data))
    
    
    out2 <- data.table(my.geo = my.geo,
                       my.model = paste0(my.labels[i], ", All Controls"),
                       my.coef = my.reg$coefficients[[paste0("z",my.geo),1]],
                       my.SE = my.reg$coefficients[[paste0("z",my.geo),2]],
                       c95u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.96*
                         my.reg$coefficients[[paste0("z",my.geo),2]],
                       c95d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.96*
                         my.reg$coefficients[[paste0("z",my.geo),2]],
                       c90u = my.reg$coefficients[[paste0("z",my.geo),1]] + 1.65*
                         my.reg$coefficients[[paste0("z",my.geo),2]],
                       c90d = my.reg$coefficients[[paste0("z",my.geo),1]] - 1.65*
                         my.reg$coefficients[[paste0("z",my.geo),2]])
    
    
    out <- rbindlist(list(out, out2))
    
  }
  
  # Coefficients plot
  my.plot1 <- ggplot(out) + 
    geom_pointrange(aes(x=my.model, y=my.coef, ymin=c95d, ymax=c95u), 
                    position=position_dodge(width=0.375), fatten = 0.25, size=.375, linewidth = 0.375, color = "darkblue") +
    geom_pointrange(aes(x=my.model, y=my.coef, ymin=c90d, ymax=c90u), 
                    position=position_dodge(width=0.375), fatten = 0.25, size=.375, linewidth = 0.75, color = "darkblue") + 
    geom_hline(aes(yintercept=0), lwd = .375) + 
    geom_hline(aes(yintercept=median(out$my.coef)), color = "#555555", lwd = .375) + 
    geom_vline(aes(xintercept=2.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=4.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=6.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=10.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=14.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=18.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=22.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=26.5), linetype='dashed', color = "black", lwd = .25) + 
    geom_vline(aes(xintercept=30.5), linetype='dashed', color = "black", lwd = .25) + 
    labs(title = NULL, 
         caption = NULL,
         y = paste0("Coefficient on\n", my.geo.title),
         x= "") +
    scale_colour_discrete(guide = FALSE) + 
    scale_x_discrete(limits = out$my.model, 
                     labels = out$my.model) +
    scale_y_continuous(labels = label_number(accuracy = 0.01)) +
    theme(
      axis.title.y = element_text(vjust = 0.5, angle=90, face="bold"),
      axis.text.y=element_text(size=7),
      axis.title=element_text(size=6),
      plot.caption=element_text(hjust = 0),
      legend.text.align = 0,
      legend.title=element_blank(),
      legend.position="right",    
      panel.border = element_rect(colour = "black", fill = NA, size = 1),
      panel.background = element_rect(fill = "gray95"),
      panel.grid.major = element_line(color = "gray"),
      panel.grid.minor = element_blank(), 
      axis.line = element_line(colour = "black"),
      plot.margin = unit(c(5.5, 0, 5.5, 5.5), "points")
    )
  
  if(last.geo) {
    my.plot1 <- my.plot1 + theme(axis.text.x=element_text(size=4, angle=90, hjust=0.95, vjust=0.2))
  } else {
    my.plot1 <- my.plot1 + theme(axis.text.x=element_blank())
    
  }
  
  # Coefficients density plot
  my.plot2 <- ggplot(out, aes(x=my.coef)) + 
    geom_histogram(aes(y=..density..), colour="darkblue", fill="lightblue", alpha = 0.5, binwidth = 0.021)+
    geom_density(alpha=.5, fill="darkgrey") +
    geom_vline(aes(xintercept=0), lwd = .375) + 
    geom_vline(aes(xintercept=median(out$my.coef)), color = "#555555", lwd = .375) + 
    xlim(layer_scales(my.plot1)$y$get_limits()) + 
    coord_flip() +
    theme(panel.border = element_rect(colour = "black", fill = NA, size = 1), 
          panel.background = element_rect(fill = "gray95"), 
          axis.line = element_line(colour = "black"),
          panel.grid = element_blank(),
          axis.title = element_blank(),
          axis.text = element_blank(),
          axis.ticks = element_blank(),
          plot.margin = unit(c(5.5, 5.5, 5.5, 0), "points")
    )

  # Merge figures
  plot_grid(my.plot1, my.plot2, align = "h", ncol = 2, 
            rel_heights = c(1, 1/3),
            rel_widths = c(0.9, 0.1))
  
  # Save
  outfile = paste0(AppendixFiguresPath, "/figure_C_9_", my.geo, ".jpg")
  if(last.geo) {
    ggsave(filename = outfile, width = 14, height = 5.2, units = "cm")
  } else {
    ggsave(filename = outfile, width = 14, height = 2.8, units = "cm")
  }
  

}

# -------------------------------------------------------------------------
# ----------                   Download Shapefiles               ----------
# -------------------------------------------------------------------------

# Define IPUMS api key
set_ipums_api_key(my.ipums_api_key, save = TRUE, overwrite = TRUE)

#--- Counties, year 2000, TL 2000 ---

# Define your NHGIS extract request
extract <- define_extract_nhgis(
  description = "Raz (2024) replication: Shapefile, Counties, year 2000, TL 2000",
  shapefiles = "us_county_2000_tl2000"  # Specify the desired shapefile
)


# Submit the extract request
submitted_extract <- submit_extract(extract)

# Wait 1 min for the data extract to be prepared
Sys.sleep(60)

# Download the completed extract when ready
download_extract(submitted_extract, download_dir = TempDataPath, overwrite = TRUE)

# Unzip
unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"), 
      exdir = TempDataPath)

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"))

unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
             "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_county_2000.zip"), 
      exdir = paste0(TempDataPath, "/shapefiles/counties/tl2000_us_county_2000/"))

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
                   "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_county_2000.zip"))
unlink(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/"), recursive = TRUE)

#--- States, year 2000, TL 2000 ---

# Define your NHGIS extract request
extract <- define_extract_nhgis(
  description = "Raz (2024) replication: Shapefile, States, year 2000, TL 2000",
  shapefiles = "us_state_2000_tl2000"  # Specify the desired shapefile
)


# Submit the extract request
submitted_extract <- submit_extract(extract)

# Wait 1 min for the data extract to be prepared
Sys.sleep(60)

# Download the completed extract when ready
download_extract(submitted_extract, download_dir = TempDataPath, overwrite = TRUE)

# Unzip
unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"), 
      exdir = TempDataPath)

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"))

unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
             "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_state_2000.zip"), 
      exdir = paste0(TempDataPath, "/shapefiles/states/tl2000_us_state_2000/"))

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
                   "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_state_2000.zip"))
unlink(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/"), recursive = TRUE)

#--- Counties, year 1940, TL 2000 ---

# Define your NHGIS extract request
extract <- define_extract_nhgis(
  description = "Raz (2024) replication: Shapefile, Counties, year 1940, TL 2000",
  shapefiles = "us_county_1940_tl2000"  # Specify the desired shapefile
)


# Submit the extract request
submitted_extract <- submit_extract(extract)


# Wait 1 min for the data extract to be prepared
Sys.sleep(60)

# Download the completed extract when ready
download_extract(submitted_extract, download_dir = TempDataPath, overwrite = TRUE)

# Unzip
unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"), 
      exdir = TempDataPath)

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"))

unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
             "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_county_1940.zip"), 
      exdir = paste0(TempDataPath, "/shapefiles/counties/tl2000_us_county_1940/"))

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
                   "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_county_1940.zip"))
unlink(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/"), recursive = TRUE)

#--- States, year 1940, TL 2000 ---

# Define your NHGIS extract request
extract <- define_extract_nhgis(
  description = "Raz (2024) replication: Shapefile, States, year 1940, TL 2000",
  shapefiles = "us_state_1940_tl2000"  # Specify the desired shapefile
)


# Submit the extract request
submitted_extract <- submit_extract(extract)

# Wait 1 min for the data extract to be prepared
Sys.sleep(60)

# Download the completed extract when ready
download_extract(submitted_extract, download_dir = TempDataPath, overwrite = TRUE)

# Unzip
unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"), 
      exdir = TempDataPath)

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"))

unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
             "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_state_1940.zip"), 
      exdir = paste0(TempDataPath, "/shapefiles/states/tl2000_us_state_1940/"))

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
                   "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_state_1940.zip"))
unlink(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/"), recursive = TRUE)

#--- Counties, year 1930, TL 2000 ---

# Define your NHGIS extract request
extract <- define_extract_nhgis(
  description = "Raz (2024) replication: Shapefile, Counties, year 1930, TL 2000",
  shapefiles = "us_county_1930_tl2000"  # Specify the desired shapefile
)


# Submit the extract request
submitted_extract <- submit_extract(extract)


# Wait 1 min for the data extract to be prepared
Sys.sleep(60)

# Download the completed extract when ready
download_extract(submitted_extract, download_dir = TempDataPath, overwrite = TRUE)

# Unzip
unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"), 
      exdir = TempDataPath)

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"))

unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
             "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_county_1930.zip"), 
      exdir = paste0(TempDataPath, "/shapefiles/counties/tl2000_us_county_1930/"))

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
                   "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_county_1930.zip"))
unlink(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/"), recursive = TRUE)

#--- States, year 1930, TL 2000 ---

# Define your NHGIS extract request
extract <- define_extract_nhgis(
  description = "Raz (2024) replication: Shapefile, States, year 1930, TL 2000",
  shapefiles = "us_state_1930_tl2000"  # Specify the desired shapefile
)


# Submit the extract request
submitted_extract <- submit_extract(extract)

# Wait 1 min for the data extract to be prepared
Sys.sleep(60)

# Download the completed extract when ready
download_extract(submitted_extract, download_dir = TempDataPath, overwrite = TRUE)

# Unzip
unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"), 
      exdir = TempDataPath)

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape.zip"))

unzip(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
             "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_state_1930.zip"), 
      exdir = paste0(TempDataPath, "/shapefiles/states/tl2000_us_state_1930/"))

file.remove(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/",
                   "nhgis0", submitted_extract$number, "_shapefile_tl2000_us_state_1930.zip"))
unlink(paste0(TempDataPath, "/nhgis0", submitted_extract$number, "_shape/"), recursive = TRUE)

# -------------------------------------------------------------------------
# ----------                    MAIN FIGURES                     ----------
# -------------------------------------------------------------------------

# Figure 1 ----------------------------------------------------------------
# County-Level Soil Heterogeneity Index

FilePath <- paste0(DataPath, "/final/LongRunData.csv")
SHIdata <- fread(FilePath)
SHIdata <- SHIdata[, .SD, .SDcols = c("GISJOIN", "STATE_TERR", "SHI25")]

SHIdata$SHI_resSTATE <- felm(SHI25 ~ 1 | STATE_TERR, data = SHIdata)$residuals %>%
  as.numeric()

plot_map(y = 2000, my.data = "SHIdata", my.col = "SHI_resSTATE",
         my.title <- paste0("\nSoil Heterogeneity Index\n (residualizing state FE)\n"),
         outfile = paste0(PaperFiguresPath, "/figure_1.jpg"))


# Figure 2 ----------------------------------------------------------------
# County-Level Local Name Index, 1940

y <- 1940
FilePath <- paste0(DataPath, "/final/CountyLevelData.csv")
CountyLevelData <- fread(FilePath, colClasses = my.colClasses)

plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "LNI_county_Native",
         my.title <- paste0("\n     Local Name\n     Index, ", y, "\n"),
         outfile = paste0(PaperFiguresPath, "/figure_2.jpg"))

# Figure 3 ----------------------------------------------------------------
# Migrants’ Response to Soil Heterogeneity

FilePath <- paste0(DataPath, "/final/migrants/MigrantsNative_2.csv")
MigrantsData <- fread(FilePath)

#--- Panel A: All households ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData)
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(PaperFiguresPath, "/figure_3A.jpg"),
       u_limit = 5, d_limit = -10)

#--- Panel B: Farmers ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  
            Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, 
          data = MigrantsData[farmerDad==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(PaperFiguresPath, "/figure_3B.jpg"),
       u_limit = 5, d_limit = -10)


#--- Panel C: Non-Farmers ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  
            Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, 
          data = MigrantsData[farmerDad==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(PaperFiguresPath, "/figure_3C.jpg"),
       u_limit = 5, d_limit = -10)


#--- Panel D: DDD ---
m1 <-felm(LNI_state_Native ~ 
            Rtime5mp:farmerDad + Rtime4m:farmerDad + Rtime3m:farmerDad + Rtime2m:farmerDad +  Rtime1m:farmerDad +  
            Rtime0:farmerDad + Rtime1:farmerDad + Rtime2:farmerDad + Rtime3:farmerDad + Rtime4:farmerDad + Rtime5:farmerDad + 
            Rtime6:farmerDad + Rtime7p:farmerDad + 
            
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 + 
            Rtime6:SHI25 + Rtime7p:SHI25 + 
            
            Rtime5mp:SHI25:farmerDad + Rtime4m:SHI25:farmerDad + Rtime3m:SHI25:farmerDad + Rtime2m:SHI25:farmerDad +  
            Rtime1m:SHI25:farmerDad + Rtime1:SHI25:farmerDad + Rtime2:SHI25:farmerDad + Rtime3:SHI25:farmerDad + Rtime4:SHI25:farmerDad + 
            Rtime5:SHI25:farmerDad + Rtime6:SHI25:farmerDad + Rtime7p:SHI25:farmerDad 
          | 
            familyFE + Rtime | 0 | GISJOIN, 
          data = MigrantsData)

summary(m1)

out <- data.frame(t = c(-5:7),
                  beta = c(m1$beta[26:30], 0, m1$beta[31:37]),
                  c95u = c(m1$beta[26:30] + 1.96*m1$cse[26:30], 0, m1$beta[31:37] + 1.96*m1$cse[31:37]),
                  c95d = c(m1$beta[26:30] - 1.96*m1$cse[26:30], 0, m1$beta[31:37] - 1.96*m1$cse[31:37]))

plotDD(out = out, outfile = paste0(PaperFiguresPath, "/figure_3D.jpg"),
       u_limit = 5, d_limit = -10)

rm(out, m0)

# -------------------------------------------------------------------------
# ----------                  APPENDIX FIGURES                   ----------
# -------------------------------------------------------------------------


# Appendix Figure A.1 -----------------------------------------------------
# County-Level Soil Heterogeneity Index, 2000

plot_map(y = 2000, my.data = "SHIdata", my.col = "SHI25",
         my.title <- paste0("\nSoil Heterogeneity\n           Index\n"),
         outfile = paste0(AppendixFiguresPath, "/Figure_A_1.jpg"))

rm(SHIdata)


# Appendix Figures B.1-B.21 -----------------------------------------------
# Validating Close-knit measurements

FilePath <- paste0(DataPath, "/final/ValidationData.csv")
ValidateData <- fread(FilePath)

FilePath <- paste0(DataPath, "/final/ValidationDataStates.csv")
ValidateDataStates <- fread(FilePath)

#--- LNI ---
dep.var.list <- c("ICM_Native", "TNI_Native", 
                  "RHI", "MKP_Native",
                  "KPR_Native", "SFTI_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Intra-Community Marriage", "Tight Norms Index",
                        "Religious Homogeneity Index", "Median Kin Propinquity",
                        "Kin Propinquity Rate","Strong Family Ties Index",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("ICM", "TNI", "RHI", "MKP", "KPR", "SFTI",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "LNI_county_Native",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/LNI_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Local Name Index")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score", 
               my.indep.var = "LNI_county_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/LNI_Col_States.jpg"), 
               my.y_lab = "Collectivism", my.x_lab = "Local Name Index")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score", 
               my.indep.var = "LNI_county_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/LNI_Tightness_State.jpg"), 
               my.y_lab = "Cultural Tightness", my.x_lab = "Local Name Index")

#--- ICM ---
dep.var.list <- c("LNI_county_Native", "TNI_Native", 
                  "RHI", "MKP_Native",
                  "KPR_Native", "SFTI_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Local Name Index", "Tight Norms Index",
                        "Religious Homogeneity Index", "Median Kin Propinquity",
                        "Kin Propinquity Rate","Strong Family Ties Index",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("LNI", "TNI", "RHI", "MKP", "KPR", "SFTI",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "ICM_Native",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/ICM_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Intra-Community Marriage")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score", 
               my.indep.var = "ICM_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/ICM_Col_States.jpg"), 
               my.y_lab = "Collectivism", my.x_lab = "Intra-Community Marriage")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score", 
               my.indep.var = "ICM_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/ICM_Tightness_State.jpg"), 
               my.y_lab = "Cultural Tightness", my.x_lab = "Intra-Community Marriage")


#--- TNI ---
dep.var.list <- c("LNI_county_Native", "ICM_Native", 
                  "RHI", "MKP_Native",
                  "KPR_Native", "SFTI_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Local Name Index", "Intra-Community Marriage",
                        "Religious Homogeneity Index", "Median Kin Propinquity",
                        "Kin Propinquity Rate","Strong Family Ties Index",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("LNI", "ICM", "RHI", "MKP", "KPR", "SFTI",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "TNI_Native",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/TNI_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Tight Norms Index")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score", 
               my.indep.var = "TNI_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/TNI_Col_States.jpg"), 
               my.y_lab = "Collectivism", my.x_lab = "Tight Norms Index")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score", 
               my.indep.var = "TNI_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/TNI_Tightness_State.jpg"), 
               my.y_lab = "Cultural Tightness", my.x_lab = "Tight Norms Index")


#--- RHI ---
dep.var.list <- c("LNI_county_Native", "ICM_Native", 
                  "TNI_Native", "MKP_Native",
                  "KPR_Native", "SFTI_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Local Name Index", "Intra-Community Marriage",
                        "Tight Norms Index", "Median Kin Propinquity",
                        "Kin Propinquity Rate","Strong Family Ties Index",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("LNI", "ICM", "TNI", "MKP", "KPR", "SFTI",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "RHI",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/RHI_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Religious Homogeneity Index")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score",
               my.indep.var = "RHI",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/RHI_Col_States.jpg"),
               my.y_lab = "Collectivism", my.x_lab = "Religious Homogeneity Index")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score",
               my.indep.var = "RHI",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/RHI_Tightness_State.jpg"),
               my.y_lab = "Cultural Tightness", my.x_lab = "Religious Homogeneity Index")

#--- MKP ---
dep.var.list <- c("LNI_county_Native", "ICM_Native", 
                  "TNI_Native", "RHI",
                  "KPR_Native", "SFTI_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Local Name Index", "Intra-Community Marriage",
                        "Tight Norms Index", "Religious Homogeneity Index",
                        "Kin Propinquity Rate","Strong Family Ties Index",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("LNI", "ICM", "TNI", "RHI", "KPR", "SFTI",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "MKP_Native",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/MKP_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Median Kin Propinquity")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score", 
               my.indep.var = "MKP_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/MKP_Col_States.jpg"), 
               my.y_lab = "Collectivism", my.x_lab = "Median Kin Propinquity")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score", 
               my.indep.var = "MKP_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/MKP_Tightness_State.jpg"), 
               my.y_lab = "Cultural Tightness", my.x_lab = "Median Kin Propinquity")

#--- KPR ---
dep.var.list <- c("LNI_county_Native", "ICM_Native", 
                  "TNI_Native", "RHI",
                  "MKP_Native", "SFTI_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Local Name Index", "Intra-Community Marriage",
                        "Tight Norms Index", "Religious Homogeneity Index",
                        "Median Kin Propinquity","Strong Family Ties Index",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("LNI", "ICM", "TNI", "RHI", "MKP", "SFTI",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "KPR_Native",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/KPR_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Kin Propinquity Rate")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score", 
               my.indep.var = "KPR_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/KPR_Col_States.jpg"), 
               my.y_lab = "Collectivism", my.x_lab = "Kin Propinquity Rate")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score", 
               my.indep.var = "KPR_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/KPR_Tightness_State.jpg"), 
               my.y_lab = "Cultural Tightness", my.x_lab = "Kin Propinquity Rate")

#--- SFTI ---
dep.var.list <- c("LNI_county_Native", "ICM_Native", 
                  "TNI_Native", "RHI",
                  "MKP_Native", "KPR_Native",
                  "communal_vs_universalist_values", "z_clustering_county", 
                  "z_support_ratio_county")
dep.var.label.list <- c("Local Name Index", "Intra-Community Marriage",
                        "Tight Norms Index", "Religious Homogeneity Index",
                        "Median Kin Propinquity","Kin Propinquity Rate",
                        "Rel. Importance of Communal Moral Values",
                        "Social Clustering", "Social Support Ratio")
file.name.add.list <- c("LNI", "ICM", "TNI", "RHI", "MKP", "KPR",
                        "uni_values", "clustering", "support_ratio")

for (i in 1:9) {
  OLSscatterPlot(my.data = ValidateData, my.dep.var = dep.var.list[i], 
                 my.indep.var = "SFTI_Native",
                 my.ctrls = "1 | STATE_TERR", my.cluster.var = "grid100m",
                 outfile = paste0(AppendixFiguresPath, "/outcomes_validation/SFTI_", file.name.add.list[i], ".jpg"), 
                 my.y_lab = dep.var.label.list[i], my.x_lab = "Strong Family Ties Index")
}

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "Col_Score", 
               my.indep.var = "SFTI_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/SFTI_Col_States.jpg"), 
               my.y_lab = "Collectivism", my.x_lab = "Strong Family Ties Index")

OLSscatterPlot(my.data = ValidateDataStates, my.dep.var = "tightness_Score", 
               my.indep.var = "SFTI_Native",
               my.ctrls = "1 | CENREG_name", my.cluster.var = NA,
               outfile = paste0(AppendixFiguresPath, "/outcomes_validation/SFTI_Tightness_State.jpg"), 
               my.y_lab = "Cultural Tightness", my.x_lab = "Strong Family Ties Index")


# Appendix Figure B.22 ----------------------------------------------------
# Mapping Close-Knit Communities: County-Level Measures, 1940

#--- Panel A: ICM, 1940 ---
plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "ICM_Native",
         my.title <- paste0("\n Intra-Communal\n  Marriage, ", y,"\n"),
         outfile = paste0(AppendixFiguresPath, "/figure_B_22A.jpg"))

#--- Panel B: TNI, 1940 ---
plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "TNI_Native",
         my.title <- paste0("\n      Tight Norms\n       Index, ", y,"\n"),
         outfile = paste0(AppendixFiguresPath, "/figure_B_22B.jpg"))

#--- Panel C: RHI, 1936 ---
y <- 1930
plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "RHI",
         my.title <- paste0("\n       Religious\n    Homogeneity\n     Index, 1936\n"),
         outfile = paste0(AppendixFiguresPath, "/figure_B_22C.jpg"))

#--- Panel D: MKP, 1940 ---
y <- 1940
plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "MKP_Native",
         my.title <- paste0("\n    Median Kin\nPropinquity, ", y,"\n"),
         outfile = paste0(AppendixFiguresPath, "/figure_B_22D.jpg"))

#--- Panel E: KPR, 1940 ---
plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "KPR_Native",
         my.title <- paste0("\n   Kin Propinquity\n       Rate, ", y,"\n"),
         outfile = paste0(AppendixFiguresPath, "/figure_B_22E.jpg"))

#--- Panel F: SFTI, 1940 ---
plot_map(y = y, my.data = "CountyLevelData[YEAR==y]", my.col = "SFTI_Native",
         my.title <- paste0("\n     Strength of\nFamily Ties, ", y,"\n"),
         outfile = paste0(AppendixFiguresPath, "/figure_B_22F.jpg"))



# Appendix Figure C.1 -----------------------------------------------------
# OLS Bin Scatter Plots 

MyFilePath <- paste0(DataPath, "/final/CountyLevelData.csv")
CountyLevelData <- fread(MyFilePath, colClasses = my.colClasses)

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "LNI_county_Native", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_LNI.jpg"), 
               my.y_lab = "Local Name Index", my.x_lab = "Soil Heterogeneity")

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "ICM_Native", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_ICM.jpg"), 
               my.y_lab = "Intra-Community Marriage", my.x_lab = "Soil Heterogeneity")

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "TNI_Native", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_TNI.jpg"), 
               my.y_lab = "Tight Norms Index", my.x_lab = "Soil Heterogeneity")

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "RHI", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_RHI.jpg"), 
               my.y_lab = "Religious Homogeneity Index", my.x_lab = "Soil Heterogeneity")

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "MKP_Native", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_MKP.jpg"), 
               my.y_lab = "Median Kin Propinquity", my.x_lab = "Soil Heterogeneity")

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "KPR_Native", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_KPR.jpg"), 
               my.y_lab = "Kin Propinquity Rate", my.x_lab = "Soil Heterogeneity")

OLSscatterPlot(my.data = CountyLevelData, my.dep.var = "SFTI_Native", my.indep.var = "SHI25",
               my.ctrls = paste0(my.SmoothLocCtrls, " | STATE_TERR:YEAR"), my.cluster.var = "grid100m",
               outfile = paste0(AppendixFiguresPath, "/figure_C_1_SFTI.jpg"), 
               my.y_lab = "Strength of Family Ties Index", my.x_lab = "Soil Heterogeneity")



# Appendix Figures C.2 ----------------------------------------------------
# The impact over time (LNI)

plotEffectTime(dep.var.name = "LNI_county_Native",
               yearsList = setdiff(seq.int(from = 1850, to = 1940, by = 10), 1890),
               outfile = paste0(AppendixFiguresPath, "/figure_C_2.jpg"))

# Appendix Figures C.3 ----------------------------------------------------
# The Association between Soil Heterogeneity and Close-Knit Communities Over Time

plotEffectTime(dep.var.name = "ICM_Native",
               yearsList = setdiff(seq.int(from = 1850, to = 1940, by = 10), 1890),
               outfile = paste0(AppendixFiguresPath, "/figure_C_3A.jpg"))

plotEffectTime(dep.var.name = "TNI_Native",
               yearsList = setdiff(seq.int(from = 1850, to = 1940, by = 10), 1890),
               outfile = paste0(AppendixFiguresPath, "/figure_C_3B.jpg"))

plotEffectTime(dep.var.name = "RHI",
               yearsList = setdiff(seq.int(from = 1850, to = 1930, by = 10), 1880),
               outfile = paste0(AppendixFiguresPath, "/figure_C_3C.jpg"))

plotEffectTime(dep.var.name = "MKP_Native",
               yearsList = setdiff(seq.int(from = 1850, to = 1940, by = 10), 1890),
               outfile = paste0(AppendixFiguresPath, "/figure_C_3D.jpg"))

plotEffectTime(dep.var.name = "KPR_Native",
               yearsList = setdiff(seq.int(from = 1850, to = 1940, by = 10), 1890),
               outfile = paste0(AppendixFiguresPath, "/figure_C_3E.jpg"))

plotEffectTime(dep.var.name = "SFTI_Native",
               yearsList = setdiff(seq.int(from = 1860, to = 1940, by = 10), 1890),
               outfile = paste0(AppendixFiguresPath, "/figure_C_3F.jpg"))


# Appendix Figure C.4 -----------------------------------------------------
# The Differential Impact is Not Driven by Age (DID)

m_f_age <- median(MigrantsData[, f_age], na.rm = T)

#--- Panel A: young ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[f_age <= m_f_age])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_4A.jpg"),
       u_limit = 5, d_limit = -10)


#--- Panel B: old ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[f_age > m_f_age])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_4B.jpg"),
       u_limit = 5, d_limit = -10)



# Appendix Figure C.5 -----------------------------------------------------
# The Differential Impact is Not Driven by a Different Births Profile (DID)

t0 <- data.table(familyFE = rep(unique(MigrantsData$familyFE), 20),
                 Rtime = rep(-9:10, length(unique(MigrantsData$familyFE))))

t1 <- MigrantsData[, .(GISJOIN, familyFE, Rtime, LNI_state_Native,
                       SHI25, farmerDad, sex)]

t <- merge(t0, t1, by = c("familyFE", "Rtime"), all = T)
rm(t0, t1)

# Births
t[, birth := 0]
t[!is.na(LNI_state_Native), birth := 1]

t[sex==1, boy := 1]
t[sex==2, boy := 0]

t <- t[, ':=' (Rtime4m = ifelse(Rtime==-4,1,0),
               Rtime3m = ifelse(Rtime==-3,1,0),
               Rtime2m = ifelse(Rtime==-2,1,0),
               Rtime1m = ifelse(Rtime==-1,1,0),
               Rtime6 = ifelse(Rtime==6,1,0),
               Rtime5 = ifelse(Rtime==5,1,0),
               Rtime4 = ifelse(Rtime==4,1,0),
               Rtime3 = ifelse(Rtime==3,1,0),
               Rtime2 = ifelse(Rtime==2,1,0),
               Rtime1 = ifelse(Rtime==1,1,0),
               Rtime0 = ifelse(Rtime==0,1,0),
               Rtime5mp = ifelse(Rtime < -4,1,0),
               Rtime7p = ifelse(Rtime > 6,1,0))]

t <- t[, Rtime := as.factor(Rtime)]

#--- Panel A: Farmers, birth probability ---
m0 <-felm(birth ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = t[farmerDad==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_5A.jpg"),
       u_limit = .075, d_limit = -.075)

#--- Panel B: Non-farmers, birth probability ---
m0 <-felm(birth ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = t[farmerDad==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_5B.jpg"),
       u_limit = .075, d_limit = -.075)



#--- Panel C: Farmers, child’s gender is male ----
m0 <-felm(boy ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = t[farmerDad==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_5C.jpg"),
       u_limit = .2, d_limit = -.2)

#--- Panel D: Non-Farmers, child’s gender is male ---
m0 <-felm(boy ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = t[farmerDad==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_5D.jpg"),
       u_limit = .2, d_limit = -.2)


# Appendix Figure C.6 -----------------------------------------------------
# The Differential Impact is Not Driven by Rural vs. Urban Locations (DID)

#--- Panel A: Non-farmers, rural ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==0 & urban==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_6A.jpg"),
       u_limit = 10, d_limit = -15)

#--- Panel B: non-farmer, urban ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==0 & urban==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_6B.jpg"),
       u_limit = 10, d_limit = -15)


# Appendix Figure C.7 -----------------------------------------------------
# Heterogeneous Impact on Farmers by Prior Communalism (DID)

#--- Panel A: Farmers, High Prior Communal Identification ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & highLNI==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_7A.jpg"),
       u_limit = 5, d_limit = -10)



#--- Panel  B: Farmers, Low Prior Communal Identification ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & highLNI==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_7B.jpg"),
       u_limit = 5, d_limit = -10)


#--- Panel  C: Farmers, Intra-Community Marriage ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & ICM==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_7C.jpg"),
       u_limit = 5, d_limit = -10)



#--- Panel D: Farmers, Extra-Community Marriage ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & ICM==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_7D.jpg"),
       u_limit = 5, d_limit = -10)


# Appendix Figure C.8 -----------------------------------------------------
# Heterogeneous Impact on Non-Farmers by Prior Communalism (DID)

#--- Panel A: Non-Farmers, High Prior Communal Identification ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==0 & highLNI==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_8A.jpg"),
       u_limit = 10, d_limit = -10)


#--- Panel  B: Non-Farmers, Low Prior Communal Identification ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==0 & highLNI==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_8B.jpg"),
       u_limit = 10, d_limit = -10)


#--- Panel  C: Non-Farmers, Intra-Community Marriage ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==0 & ICM==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_8C.jpg"),
       u_limit = 10, d_limit = -10)



#--- Panel D: Non-Farmers, Extra-Community Marriage ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==0 & ICM==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))


plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_C_8D.jpg"),
       u_limit = 10, d_limit = -10)


# Appendix Figure C.9 -----------------------------------------------------
# The Impact of Other Geo-climatic Characteristics

MyFilePath <- paste0(DataPath, "/final/MigrantsChildrenData.csv")
MigrantsChildren <- fread(MyFilePath)

geo.list <- c("SHI25", "Temperature", "Precipitation", "Slope", "Elevation", "Flow_acc", 
              "maxval", "RiverDensity", "SHAPE_AREA", "dist2NavRiver",
              "dist2Lakes", "dist2Shoreline")

geo.title.list <- c("Soil heterogeneity", "Temperature", "Precipitation", "Slope", "Elevation", 
                    "Flow accumulation", "Agricultural productivity", "River density", 
                    "Total area", "Distance to rivers", "Distance to lakes", "Distance to shore")

for (i in 1:length(geo.list)) {
  if(i %% 4 == 0) {
    plotGeoCoefs(my.geo = geo.list[i], my.geo.title = geo.title.list[i], last.geo = T)
  } else {
    plotGeoCoefs(my.geo = geo.list[i], my.geo.title = geo.title.list[i], last.geo = F)
  }
}


# Figure D.1 --------------------------------------------------------------
# Robustness to Different Standard Errors

#--- Panel A: LNI ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = LNI_county_Native ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

ClusterVarList = c("grid25m", "grid50m", "grid75m", "grid100m", "grid125m",
                   "grid150m", "grid175m", "grid200m", "grid225m", "grid250m")

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1A.jpg")
clusterSErobustness(dep.var.name = "LNI_county_Native",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)



#--- Panel B: ICM ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = ICM_Native ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1B.jpg")
clusterSErobustness(dep.var.name = "ICM_Native",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)


#--- Panel C: TNI ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = TNI_Native ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1C.jpg")
clusterSErobustness(dep.var.name = "TNI_Native",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)



#--- Panel D: RHI ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = RHI ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1D.jpg")
clusterSErobustness(dep.var.name = "RHI",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)


#--- Panel E: MKP ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = MKP_Native ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1E.jpg")
clusterSErobustness(dep.var.name = "MKP_Native",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)


#--- Panel F: KPR ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = KPR_Native ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1F.jpg")
clusterSErobustness(dep.var.name = "KPR_Native",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)


#--- Panel G: SFTI ---
conleyPARs <- list(myVar = "SHI25",
                   list(formula = SFTI_Native ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = CountyLevelData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 100))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_1G.jpg")
clusterSErobustness(dep.var.name = "SFTI_Native",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = CountyLevelData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)

# Appendix Figure D.2 -----------------------------------------------------
# Adding Individual Controls to DID Estimates

#--- Panel A: Farmers ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  
            Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime + sex + birthOrder + cohortFE5Y | 0 | GISJOIN, 
          data = MigrantsData[farmerDad==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_D_2A.jpg"),
       u_limit = 5, d_limit = -10)

#--- Panel B: Non-Farmers ---
m0 <-felm(LNI_state_Native ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  
            Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime + sex + birthOrder + cohortFE5Y | 0 | GISJOIN, 
          data = MigrantsData[farmerDad==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_D_2B.jpg"),
       u_limit = 5, d_limit = -10)

rm(out, m0)

# Appendix Figure D.3 -----------------------------------------------------
# Heterogeneous Impact on Farmers by Prior Communal Identification, logs (DID)

#--- Panel A: Farmers, High Prior Communal Identification (log) ---
m0 <-felm(log(LNI_state_Native) ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & highLNI==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_D_3A.jpg"),
       u_limit = .1, d_limit = -.25)



#--- Panel B: Farmers, Low Prior Communal Identification (log) ---
m0 <-felm(log(LNI_state_Native) ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & highLNI==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_D_3B.jpg"),
       u_limit = .1, d_limit = -.25)



#--- Panel C: Farmers, Intra-Community Marriage (log) ---
m0 <-felm(log(LNI_state_Native) ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & ICM==1])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_D_3C.jpg"),
       u_limit = .1, d_limit = -.25)



#--- Panel D: Farmers, Extra-Community Marriage (log) ---
m0 <-felm(log(LNI_state_Native) ~ 
            Rtime5mp:SHI25 + Rtime4m:SHI25 + Rtime3m:SHI25 + Rtime2m:SHI25 +  Rtime1m:SHI25 +  
            Rtime1:SHI25 + Rtime2:SHI25 + Rtime3:SHI25 + Rtime4:SHI25 + Rtime5:SHI25 +
            Rtime6:SHI25 + Rtime7p:SHI25 | 
            familyFE + Rtime | 0 | GISJOIN, data = MigrantsData[farmerDad==1 & ICM==0])
summary(m0)

out <- data.frame(t = c(-5:7),
                  beta = c(m0$beta[1:5], 0, m0$beta[6:12]),
                  c95u = c(m0$beta[1:5] + 1.96*m0$cse[1:5], 0, m0$beta[6:12] + 1.96*m0$cse[6:12]),
                  c95d = c(m0$beta[1:5] - 1.96*m0$cse[1:5], 0, m0$beta[6:12] - 1.96*m0$cse[6:12]))

plotDD(out = out, outfile = paste0(AppendixFiguresPath, "/figure_D_3D.jpg"),
       u_limit = .1, d_limit = -.25)


# Appendix Figure D.4 -----------------------------------------------------
# Social Learning: Robustness to Different Standard Errors

#--- Growth in Fertilizers Use ---
FilePath <- paste0(DataPath, "/final/FertilizerData.csv")
FertilizerData <- fread(FilePath, colClasses = my.colClasses)

conleyPARs <- list(myVar = "SHI25",
                   list(formula = g_shareFertilizer ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = FertilizerData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 30))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_4A.jpg")
clusterSErobustness(dep.var.name = "g_shareFertilizer",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = FertilizerData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)


#--- Growth in Wheat Share ---
FilePath <- paste0(DataPath, "/final/WheatShareData.csv")
WheatShareData <- fread(FilePath, colClasses = my.colClasses)
conleyPARs <- list(myVar = "SHI25",
                   list(formula = g_shareWheat ~ SHI25 + Elevation + Slope + Flow_acc + Precipitation + Temperature + RiverDensity + maxval + SHAPE_AREA + dist2NavRiver + dist2Lakes + dist2Shoreline + XCoord + YCoord + XCoord2 + YCoord2 + XYCoord | STATE_TERRxYEAR,
                        data = WheatShareData,
                        dist_cutoff = 500*mile2km, 
                        model = "ols",
                        unit = "countyID",
                        time = "year",
                        lat = "lat",
                        lon = "lon",
                        kernel = "bartlett", 
                        lag_cutoff = 65))

# saving figure
FilePath <- paste0(AppendixFiguresPath, "/figure_D_4B.jpg")
clusterSErobustness(dep.var.name = "g_shareWheat",
                    indep.var.name = "SHI25",
                    FEs = "STATE_TERR:YEAR",
                    ClusterVarList = ClusterVarList,
                    reg.data = WheatShareData, 
                    outfile = FilePath, 
                    robust = TRUE, 
                    countyCluster = TRUE, 
                    stateCluster = TRUE,
                    conleySEs = TRUE, 
                    conleyPARs = conleyPARs)



# Clear -------------------------------------------------------------------

rm(list = ls())
gc()